Analysis of GLDS-38 from NASA GeneLab

This R markdown file was auto-generated by the iDEP website Using iDEP 0.91, originally by Steven Xijin.Ge@sdstate.edu

Ge SX, Son EW, Yao R: iDEP: an integrated web application for differential expression and pathway analysis of RNA-Seq data. BMC Bioinformatics 2018, 19(1):534. PMID:30567491

1. Read data

First we set up the working directory to where the files are saved.

input_biclustMethod_ <- "BCCC()"

R packages used

library(RSQLite, verbose = FALSE) # for database connection
library(gplots, verbose = FALSE) # for hierarchical clustering
library(ggplot2, verbose = FALSE) # graphics
library(e1071, verbose = FALSE) # computing kurtosis
#library(DT, verbose = FALSE) # for renderDataTable
library(plotly, verbose = FALSE) # for interactive heatmap
library(reshape2, verbose = FALSE) # for melt correlation matrix in heatmap

# From Data Read Function
library(edgeR, verbose = FALSE) # count data D.E.
library(DESeq2, verbose = FALSE) # count data analysis, DEG.DESeq2

# TSNE Plot, tSNEgenePlot
library(Rtsne, verbose = FALSE)

# PGSA Pathway PGSEA Pathway, PGSEAplot
library(PGSEA, verbose = FALSE)
## Warning: Package 'KEGG.db' is deprecated and will be removed from Bioconductor
##   version 3.12
# DEG.limma
library(limma, verbose = FALSE) # Differential expression
library(statmod, verbose = FALSE)

# enrichment plot
library(dendextend) # customizing tree

# enrich.net2, moduleNetwork
library(igraph)

# Stringdb_geneList, StringDB_GO_enrichmentData, stringDB_network1
# StringDB_network_link
library(STRINGdb, verbose = FALSE)

# gagePathwayData
library(gage, verbose = FALSE) # pathway analysis

# fgseaPathwayData
library(fgsea, verbose = FALSE) # fast GSEA

# ReactomePAPathwayData
library(ReactomePA, verbose = FALSE) # pathway analysis

# KeggImage
library(pathview)

# genomePlot, genomePlotDataPre
library(PREDA, verbose = FALSE) # showing expression on genome
library(PREDAsampledata, verbose = FALSE)
library(hgu133plus2.db, verbose = FALSE)

# biclustering
library(biclust, verbose = FALSE)

library(knitr) #  install if needed. for showing tables with kable
library(kableExtra)

if (input_biclustMethod_ == "BCQU()") {
  library(QUBIC, verbose = FALSE)
} # have trouble installing on Linux
if (input_biclustMethod_ == "BCUnibic()") {
  library(runibic, verbose = FALSE)
} # Test biclustMethod dependant qubic runibic

# wgcna
library(WGCNA)
library(flashClust, verbose = FALSE)
source("iDEP_core_functions_only.R")
# Each row of this matrix represents a color scheme;
mycolors_ <- sort(rainbow(20))[c(1, 20, 10, 11, 2, 19, 3, 12, 4, 13, 5, 14, 6, 15, 7, 16, 8, 17, 9, 18)]

hmcols_ <- colorRampPalette(colors = c('#4575B4', '#91BFDB', '#E0F3F8', '#FFFFBF', '#FEE090', '#FC8D59', '#D73027'))(75)

heatColors_ <- rbind(
  greenred(75), 
  bluered(75), 
  colorpanel(75, "green", "black", "magenta"), 
  colorpanel(75, "blue", "yellow", "red"), 
  hmcols_
)

rownames(heatColors_) <- c("Green-Black-Red", "Blue-White-Red", "Green-Black-Magenta", "Blue-Yellow-Red", "Blue-white-brown")

We are using the downloaded gene expression file where gene IDs has been converted to Ensembl gene IDs. This is because the ID conversion database is too large to download. You can use your original file if your file uses Ensembl ID, or you do not want to use the pathway files available in iDEP (or it is not available).

inputFolderFiles <- list.files(params$input_folder, full.names = TRUE)

inputFile_ <- inputFolderFiles[stringr::str_detect(tolower(inputFolderFiles), "expression.csv")]
sampleInfoFile_ <- inputFolderFiles[stringr::str_detect(tolower(inputFolderFiles), "sampleinfo.csv")]
gldsMetadataFile_ <- inputFolderFiles[stringr::str_detect(tolower(inputFolderFiles), "metadata.csv")]


geneInfoFile_ <- params$geneInfoFile
geneSetFile_ <- params$geneSetFile # pathway database in SQL; can be GMT format

STRING10_speciesFile_ <- "https://raw.githubusercontent.com/iDEP-SDSU/idep/master/shinyapps/idep/STRING10_species.csv"
readMetadata.out_ <- readMetadata(inFile = gldsMetadataFile_) #gldsMetadataFile_)

kable(readMetadata.out_) %>%
  kable_styling(bootstrap_options = c("striped", "hover")) %>%
  scroll_box(width = "100%")
Col0_GC_Alight_Rep1 Col0_GC_Alight_Rep2 Col0_GC_Alight_Rep3 Col0_GC_dark_Rep1 Col0_GC_dark_Rep2 Col0_GC_dark_Rep3 Ws_GC_Alight_Rep1 Ws_GC_Alight_Rep2 Ws_GC_Alight_Rep3 Ws_GC_dark_Rep1 Ws_GC_dark_Rep2 Ws_GC_dark_Rep3 Col0PhyD_GC_Alight_Rep1 Col0PhyD_GC_Alight_Rep2 Col0PhyD_GC_Alight_Rep3 Col0PhyD_GC_dark_Rep1 Col0PhyD_GC_dark_Rep2 Col0PhyD_GC_dark_Rep3 Col0_FLT_Alight_Rep1 Col0_FLT_Alight_Rep2 Col0_FLT_Alight_Rep3 Col0_FLT_dark_Rep1 Col0_FLT_dark_Rep2 Col0_FLT_dark_Rep3 Ws_FLT_Alight_Rep1 Ws_FLT_Alight_Rep2 Ws_FLT_Alight_Rep3 Ws_FLT_dark_Rep1 Ws_FLT_dark_Rep2 Ws_FLT_dark_Rep3 Col0PhyD_FLT_Alight_Rep1 Col0PhyD_FLT_Alight_Rep2 Col0PhyD_FLT_Alight_Rep3 Col0PhyD_FLT_dark_Rep1 Col0PhyD_FLT_dark_Rep2 Col0PhyD_FLT_dark_Rep3
Sample.LongId Atha.Col.0.root.GC.Alight.Rep1.GSM2493759.RNAseq.RNAseq Atha.Col.0.root.GC.Alight.Rep2.GSM2493760.RNAseq.RNAseq Atha.Col.0.root.GC.Alight.Rep3.GSM2493761.RNAseq.RNAseq Atha.Col.0.root.GC.dark.Rep1.GSM2493768.RNAseq.RNAseq Atha.Col.0.root.GC.dark.Rep2.GSM2493769.RNAseq.RNAseq Atha.Col.0.root.GC.dark.Rep3.GSM2493770.RNAseq.RNAseq Atha.Ws.root.GC.Alight.Rep1.GSM2493762.RNAseq.RNAseq Atha.Ws.root.GC.Alight.Rep2.GSM2493763.RNAseq.RNAseq Atha.Ws.root.GC.Alight.Rep3.GSM2493764.RNAseq.RNAseq Atha.Ws.root.GC.dark.Rep1.GSM2493771.RNAseq.RNAseq Atha.Ws.root.GC.dark.Rep2.GSM2493772.RNAseq.RNAseq Atha.Ws.root.GC.dark.Rep3.GSM2493773.RNAseq.RNAseq Atha.Col.0.PhyD.root.GC.Alight.Rep1.GSM2493765.RNAseq.RNAseq Atha.Col.0.PhyD.root.GC.Alight.Rep2.GSM2493766.RNAseq.RNAseq Atha.Col.0.PhyD.root.GC.Alight.Rep3.GSM2493767.RNAseq.RNAseq Atha.Col.0.PhyD.root.GC.dark.Rep1.GSM2493774.RNAseq.RNAseq Atha.Col.0.PhyD.root.GC.dark.Rep2.GSM2493775.RNAseq.RNAseq Atha.Col.0.PhyD.root.GC.dark.Rep3.GSM2493776.RNAseq.RNAseq Atha.Col.0.root.FLT.Alight.Rep1.GSM2493777.RNAseq.RNAseq Atha.Col.0.root.FLT.Alight.Rep2.GSM2493778.RNAseq.RNAseq Atha.Col.0.root.FLT.Alight.Rep3.GSM2493779.RNAseq.RNAseq Atha.Col.0.root.FLT.dark.Rep1.GSM2493786.RNAseq.RNAseq Atha.Col.0.root.FLT.dark.Rep2.GSM2493787.RNAseq.RNAseq Atha.Col.0.root.FLT.dark.Rep3.GSM2493788.RNAseq.RNAseq Atha.Ws.root.FLT.Alight.Rep1.GSM2493780.RNAseq.RNAseq Atha.Ws.root.FLT.Alight.Rep2.GSM2493781.RNAseq.RNAseq Atha.Ws.root.FLT.Alight.Rep3.GSM2493782.RNAseq.RNAseq Atha.Ws.root.FLT.dark.Rep1.GSM2493789.RNAseq.RNAseq Atha.Ws.root.FLT.dark.Rep2.GSM2493790.RNAseq.RNAseq Atha.Ws.root.FLT.dark.Rep3.GSM2493791.RNAseq.RNAseq Atha.Col.0.PhyD.root.FLT.Alight.Rep1.GSM2493783.RNAseq.RNAseq Atha.Col.0.PhyD.root.FLT.Alight.Rep2.GSM2493784.RNAseq.RNAseq Atha.Col.0.PhyD.root.FLT.Alight.Rep3.GSM2493785.RNAseq.RNAseq Atha.Col.0.PhyD.root.FLT.dark.Rep1.GSM2493792.RNAseq.RNAseq Atha.Col.0.PhyD.root.FLT.dark.Rep2.GSM2493793.RNAseq.RNAseq Atha.Col.0.PhyD.root.FLT.dark.Rep3.GSM2493794.RNAseq.RNAseq
Sample.Id
Sample.Name Atha_Col-0_root_GC_Alight_Rep1_GSM2493759 Atha_Col-0_root_GC_Alight_Rep2_GSM2493760 Atha_Col-0_root_GC_Alight_Rep3_GSM2493761 Atha_Col-0_root_GC_dark_Rep1_GSM2493768 Atha_Col-0_root_GC_dark_Rep2_GSM2493769 Atha_Col-0_root_GC_dark_Rep3_GSM2493770 Atha_Ws_root_GC_Alight_Rep1_GSM2493762 Atha_Ws_root_GC_Alight_Rep2_GSM2493763 Atha_Ws_root_GC_Alight_Rep3_GSM2493764 Atha_Ws_root_GC_dark_Rep1_GSM2493771 Atha_Ws_root_GC_dark_Rep2_GSM2493772 Atha_Ws_root_GC_dark_Rep3_GSM2493773 Atha_Col-0-PhyD_root_GC_Alight_Rep1_GSM2493765 Atha_Col-0-PhyD_root_GC_Alight_Rep2_GSM2493766 Atha_Col-0-PhyD_root_GC_Alight_Rep3_GSM2493767 Atha_Col-0-PhyD_root_GC_dark_Rep1_GSM2493774 Atha_Col-0-PhyD_root_GC_dark_Rep2_GSM2493775 Atha_Col-0-PhyD_root_GC_dark_Rep3_GSM2493776 Atha_Col-0_root_FLT_Alight_Rep1_GSM2493777 Atha_Col-0_root_FLT_Alight_Rep2_GSM2493778 Atha_Col-0_root_FLT_Alight_Rep3_GSM2493779 Atha_Col-0_root_FLT_dark_Rep1_GSM2493786 Atha_Col-0_root_FLT_dark_Rep2_GSM2493787 Atha_Col-0_root_FLT_dark_Rep3_GSM2493788 Atha_Ws_root_FLT_Alight_Rep1_GSM2493780 Atha_Ws_root_FLT_Alight_Rep2_GSM2493781 Atha_Ws_root_FLT_Alight_Rep3_GSM2493782 Atha_Ws_root_FLT_dark_Rep1_GSM2493789 Atha_Ws_root_FLT_dark_Rep2_GSM2493790 Atha_Ws_root_FLT_dark_Rep3_GSM2493791 Atha_Col-0-PhyD_root_FLT_Alight_Rep1_GSM2493783 Atha_Col-0-PhyD_root_FLT_Alight_Rep2_GSM2493784 Atha_Col-0-PhyD_root_FLT_Alight_Rep3_GSM2493785 Atha_Col-0-PhyD_root_FLT_dark_Rep1_GSM2493792 Atha_Col-0-PhyD_root_FLT_dark_Rep2_GSM2493793 Atha_Col-0-PhyD_root_FLT_dark_Rep3_GSM2493794
GLDS 120 120 120 120 120 120 120 120 120 120 120 120 120 120 120 120 120 120 120 120 120 120 120 120 120 120 120 120 120 120 120 120 120 120 120 120
Accession GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120 GLDS-120
Hardware Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish Petri dish
Tissue Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots Roots
Age 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days 11 days
Organism Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana Arabidopsis thaliana
Ecotype Col-0 Col-0 Col-0 Col-0 Col-0 Col-0 WS-0 WS-0 WS-0 WS-0 WS-0 WS-0 Col-0 Col-0 Col-0 Col-0 Col-0 Col-0 Col-0 Col-0 Col-0 Col-0 Col-0 Col-0 WS-0 WS-0 WS-0 WS-0 WS-0 WS-0 Col-0 Col-0 Col-0 Col-0 Col-0 Col-0
Genotype WT WT WT WT WT WT WT WT WT WT WT WT PhyD PhyD PhyD PhyD PhyD PhyD WT WT WT WT WT WT WT WT WT WT WT WT PhyD PhyD PhyD PhyD PhyD PhyD
Variety Col-0 WT Col-0 WT Col-0 WT Col-0 WT Col-0 WT Col-0 WT WS-0 WT WS-0 WT WS-0 WT WS-0 WT WS-0 WT WS-0 WT Col-0 PhyD Col-0 PhyD Col-0 PhyD Col-0 PhyD Col-0 PhyD Col-0 PhyD Col-0 WT Col-0 WT Col-0 WT Col-0 WT Col-0 WT Col-0 WT WS-0 WT WS-0 WT WS-0 WT WS-0 WT WS-0 WT WS-0 WT Col-0 PhyD Col-0 PhyD Col-0 PhyD Col-0 PhyD Col-0 PhyD Col-0 PhyD
Radiation Background Earth Background Earth Background Earth Background Earth Background Earth Background Earth Background Earth Background Earth Background Earth Background Earth Background Earth Background Earth Background Earth Background Earth Background Earth Background Earth Background Earth Background Earth Cosmic radiation Cosmic radiation Cosmic radiation Cosmic radiation Cosmic radiation Cosmic radiation Cosmic radiation Cosmic radiation Cosmic radiation Cosmic radiation Cosmic radiation Cosmic radiation Cosmic radiation Cosmic radiation Cosmic radiation Cosmic radiation Cosmic radiation Cosmic radiation
Gravity Terrestrial Terrestrial Terrestrial Terrestrial Terrestrial Terrestrial Terrestrial Terrestrial Terrestrial Terrestrial Terrestrial Terrestrial Terrestrial Terrestrial Terrestrial Terrestrial Terrestrial Terrestrial Microgravity Microgravity Microgravity Microgravity Microgravity Microgravity Microgravity Microgravity Microgravity Microgravity Microgravity Microgravity Microgravity Microgravity Microgravity Microgravity Microgravity Microgravity
Developmental 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots 11 day old seedling roots
Time.series.or.Concentration.gradient Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point Single time point
Light 4-6 umoles m-2 s-1 total light 4-6 umoles m-2 s-1 total light 4-6 umoles m-2 s-1 total light Light-activated for 4 hours, then dark-grown Light-activated for 4 hours, then dark-grown Light-activated for 4 hours, then dark-grown 4-6 umoles m-2 s-1 total light 4-6 umoles m-2 s-1 total light 4-6 umoles m-2 s-1 total light Light-activated for 4 hours, then dark-grown Light-activated for 4 hours, then dark-grown Light-activated for 4 hours, then dark-grown 4-6 umoles m-2 s-1 total light 4-6 umoles m-2 s-1 total light 4-6 umoles m-2 s-1 total light Light-activated for 4 hours, then dark-grown Light-activated for 4 hours, then dark-grown Light-activated for 4 hours, then dark-grown 4-6 umoles m-2 s-1 total light 4-6 umoles m-2 s-1 total light 4-6 umoles m-2 s-1 total light Light-activated for 4 hours, then dark-grown Light-activated for 4 hours, then dark-grown Light-activated for 4 hours, then dark-grown 4-6 umoles m-2 s-1 total light 4-6 umoles m-2 s-1 total light 4-6 umoles m-2 s-1 total light Light-activated for 4 hours, then dark-grown Light-activated for 4 hours, then dark-grown Light-activated for 4 hours, then dark-grown 4-6 umoles m-2 s-1 total light 4-6 umoles m-2 s-1 total light 4-6 umoles m-2 s-1 total light Light-activated for 4 hours, then dark-grown Light-activated for 4 hours, then dark-grown Light-activated for 4 hours, then dark-grown
Analysis.Light Alight Alight Alight Dark Dark Dark Alight Alight Alight Dark Dark Dark Alight Alight Alight Dark Dark Dark Alight Alight Alight Dark Dark Dark Alight Alight Alight Dark Dark Dark Alight Alight Alight Dark Dark Dark
Assay..RNAseq. RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling RNAseq Transcription Profiling
Temperature
Treatment.type
Treatment.intensity X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X
Treament.timing X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X
Preservation.Method. RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater RNAlater
readData.out_ <- readData(inFile = inputFile_, 
                          input_missingValue = "geneMedian", 
                          input_dataFileFormat = 1, 
                          input_minCounts = 0.5, 
                          input_NminSamples = 1, 
                          input_countsLogStart = 4, 
                          input_CountsTransform = 1)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
kable(head(readData.out_$data)) %>%
  kable_styling(bootstrap_options = c("striped", "hover")) %>%
  scroll_box(width = "100%")
Col0_GC_Alight_Rep1 Col0_GC_Alight_Rep2 Col0_GC_Alight_Rep3 Col0_GC_dark_Rep1 Col0_GC_dark_Rep2 Col0_GC_dark_Rep3 Ws_GC_Alight_Rep1 Ws_GC_Alight_Rep2 Ws_GC_Alight_Rep3 Ws_GC_dark_Rep1 Ws_GC_dark_Rep2 Ws_GC_dark_Rep3 Col0PhyD_GC_Alight_Rep1 Col0PhyD_GC_Alight_Rep2 Col0PhyD_GC_Alight_Rep3 Col0PhyD_GC_dark_Rep1 Col0PhyD_GC_dark_Rep2 Col0PhyD_GC_dark_Rep3 Col0_FLT_Alight_Rep1 Col0_FLT_Alight_Rep2 Col0_FLT_Alight_Rep3 Col0_FLT_dark_Rep1 Col0_FLT_dark_Rep2 Col0_FLT_dark_Rep3 Ws_FLT_Alight_Rep1 Ws_FLT_Alight_Rep2 Ws_FLT_Alight_Rep3 Ws_FLT_dark_Rep1 Ws_FLT_dark_Rep2 Ws_FLT_dark_Rep3 Col0PhyD_FLT_Alight_Rep1 Col0PhyD_FLT_Alight_Rep2 Col0PhyD_FLT_Alight_Rep3 Col0PhyD_FLT_dark_Rep1 Col0PhyD_FLT_dark_Rep2 Col0PhyD_FLT_dark_Rep3
AT1G07590 14.09484 15.82643 16.00050 13.10404 13.51777 14.64386 13.409439 15.77315 14.82674 8.734572 12.853189 14.49060 14.30307 16.42429 16.25823 14.93434 14.023780 13.652232 13.06820 13.47644 15.85437 14.53257 12.546514 16.29378 13.70761 17.15691 15.84185 11.771773 15.17731 15.44411 15.63005 16.46416 16.53664 16.16638 14.22419 11.227882
AT1G21310 13.82237 12.89321 13.59194 12.69178 11.84638 13.57377 11.609280 14.87147 13.40320 10.803775 11.668910 12.93797 12.21651 15.52722 14.64023 14.21443 13.702426 13.487243 13.86935 13.89505 14.02481 14.15226 13.295761 15.67999 14.01774 15.40720 14.53493 13.029378 14.92947 14.70538 14.61414 13.92094 14.68044 14.58744 13.84959 11.987155
AT2G33830 13.00360 12.70793 13.19989 11.07625 11.44428 12.10500 11.605670 13.01332 13.23629 6.342765 8.041026 10.62547 13.12869 14.63728 13.86356 12.00093 9.571015 8.582187 13.84084 13.54377 14.43678 13.47765 11.086524 14.73928 13.32348 14.63064 13.90582 9.399696 12.33273 12.11427 14.45015 14.83492 14.74970 13.46973 13.62021 13.043726
AT1G07610 12.08104 14.30673 14.23944 10.40031 11.39977 12.30921 9.268934 12.67998 11.15489 4.495592 9.733672 10.63264 11.22988 14.58783 14.11140 12.85493 11.888352 11.502633 10.64115 11.46993 13.77730 12.79971 9.080162 14.60663 10.27736 13.91128 12.58584 8.510065 11.83719 12.25284 13.89512 14.26232 14.34338 13.98226 11.39071 6.943481
AT5G03240 13.20130 13.12291 13.32156 11.49666 11.57663 13.03192 12.702830 13.73541 14.16480 10.261950 10.374225 11.62274 13.07120 14.91590 14.53655 13.45543 11.608011 11.671690 12.73701 13.21551 13.74191 12.73065 10.975705 12.96365 12.31093 14.01545 13.06204 10.566490 12.53392 12.64681 13.26962 14.37130 14.68130 13.65752 12.14785 12.731019
AT2G45960 14.76921 14.16179 14.40573 12.98223 12.07059 13.29898 13.412024 14.52394 14.19108 11.907155 12.186793 12.66575 12.72364 15.00718 14.16692 13.92458 12.483819 12.222020 13.90170 13.63596 14.27782 13.88254 12.522675 13.61255 14.13872 13.88720 14.02430 11.971582 13.47262 13.65144 13.90907 13.79361 14.08793 13.30008 12.89654 12.072701
readSampleInfo.out_ <- readSampleInfo(inFile = sampleInfoFile_, 
                                      readData.out = readData.out_)
kable(readSampleInfo.out_) %>%
  kable_styling(bootstrap_options = c("striped", "hover")) %>%
  scroll_box(width = "100%")
Gravity Variety Light
Col0_GC_Alight_Rep1 Terrestrial Col0 WT Alight
Col0_GC_Alight_Rep2 Terrestrial Col0 WT Alight
Col0_GC_Alight_Rep3 Terrestrial Col0 WT Alight
Col0_GC_dark_Rep1 Terrestrial Col0 WT Dark
Col0_GC_dark_Rep2 Terrestrial Col0 WT Dark
Col0_GC_dark_Rep3 Terrestrial Col0 WT Dark
Ws_GC_Alight_Rep1 Terrestrial WS0 WT Alight
Ws_GC_Alight_Rep2 Terrestrial WS0 WT Alight
Ws_GC_Alight_Rep3 Terrestrial WS0 WT Alight
Ws_GC_dark_Rep1 Terrestrial WS0 WT Dark
Ws_GC_dark_Rep2 Terrestrial WS0 WT Dark
Ws_GC_dark_Rep3 Terrestrial WS0 WT Dark
Col0PhyD_GC_Alight_Rep1 Terrestrial Col0 PhyD Alight
Col0PhyD_GC_Alight_Rep2 Terrestrial Col0 PhyD Alight
Col0PhyD_GC_Alight_Rep3 Terrestrial Col0 PhyD Alight
Col0PhyD_GC_dark_Rep1 Terrestrial Col0 PhyD Dark
Col0PhyD_GC_dark_Rep2 Terrestrial Col0 PhyD Dark
Col0PhyD_GC_dark_Rep3 Terrestrial Col0 PhyD Dark
Col0_FLT_Alight_Rep1 Microgravity Col0 WT Alight
Col0_FLT_Alight_Rep2 Microgravity Col0 WT Alight
Col0_FLT_Alight_Rep3 Microgravity Col0 WT Alight
Col0_FLT_dark_Rep1 Microgravity Col0 WT Dark
Col0_FLT_dark_Rep2 Microgravity Col0 WT Dark
Col0_FLT_dark_Rep3 Microgravity Col0 WT Dark
Ws_FLT_Alight_Rep1 Microgravity WS0 WT Alight
Ws_FLT_Alight_Rep2 Microgravity WS0 WT Alight
Ws_FLT_Alight_Rep3 Microgravity WS0 WT Alight
Ws_FLT_dark_Rep1 Microgravity WS0 WT Dark
Ws_FLT_dark_Rep2 Microgravity WS0 WT Dark
Ws_FLT_dark_Rep3 Microgravity WS0 WT Dark
Col0PhyD_FLT_Alight_Rep1 Microgravity Col0 PhyD Alight
Col0PhyD_FLT_Alight_Rep2 Microgravity Col0 PhyD Alight
Col0PhyD_FLT_Alight_Rep3 Microgravity Col0 PhyD Alight
Col0PhyD_FLT_dark_Rep1 Microgravity Col0 PhyD Dark
Col0PhyD_FLT_dark_Rep2 Microgravity Col0 PhyD Dark
Col0PhyD_FLT_dark_Rep3 Microgravity Col0 PhyD Dark
input_noIDConversion_ <- TRUE

allGeneInfo.out_ <- geneInfo(fileName = geneInfoFile_)
converted.out_ <- NULL
convertedData.out_ <- convertedData(converted.out = NULL, 
                                    readData.out = readData.out_, 
                                    input_noIDConversion = TRUE)

nGenesFilter(readData.out = readData.out_, 
             converted.out = NULL, 
             convertedData.out = convertedData.out_, 
             input_noIDConversion = TRUE)
## [1] "16156 genes in 36 samples. 16155  genes passed filter.\n Original gene IDs used."
convertedCounts.out_ <- convertedCounts(readData.out = readData.out_, converted.out = NULL) # converted counts, just for compatibility

2. Pre-process

# Read counts per library
parDefault_ <- par()
par(mar = c(12, 4, 2, 2))
# barplot of total read counts
rawCounts <- readData.out_$rawCounts
groups_ <- as.factor(detectGroups(colnames(rawCounts)))
if (nlevels(groups_) <= 1 | nlevels(groups_) > 20) {
  col1_ <- "green"
} else {
  col1_ <- rainbow(nlevels(groups_))[groups_]
}

barplot(colSums(readData.out_$rawCounts) / 1e6,
        col = col1_, las = 3, main = "Total read counts (millions)"
)

readCountsBias(readData.out = readData.out_, readSampleInfo.out = readSampleInfo.out_) # detecting bias in sequencing depth
## [1] 0.05123677
## [1] 0.5460606
## [1] 0.2013552
## [1] 0.3019591
## [1] "No bias detected"
# Box plot
boxplot(
  x = readData.out_$data,
  las = 2, col = col1_,
  ylab = "Transformed expression levels",
  main = "Distribution of transformed data"
)

# Density plot
par(parDefault_)
## Warning in par(parDefault_): graphical parameter "cin" cannot be set
## Warning in par(parDefault_): graphical parameter "cra" cannot be set
## Warning in par(parDefault_): graphical parameter "csi" cannot be set
## Warning in par(parDefault_): graphical parameter "cxy" cannot be set
## Warning in par(parDefault_): graphical parameter "din" cannot be set
## Warning in par(parDefault_): graphical parameter "page" cannot be set
densityPlot(readData.out = readData.out_, 
            mycolors = mycolors_)

# Scatter plot of the first two samples
plot(
  x = readData.out_$data[, 1:2],
  xlab = colnames(readData.out_$data)[1], 
  ylab = colnames(readData.out_$data)[2],
  main = "Scatter plot of first two samples"
)

#### plot gene or gene family
genePlot(allGeneInfo.out = allGeneInfo.out_, 
         convertedData.out = convertedData.out_, 
         input_selectOrg = "BestMatch", 
         input_geneSearch = "HOXA")
## NULL
geneBarPlotError(convertedData.out = convertedData.out_, 
                 allGeneInfo.out = allGeneInfo.out_, 
                 input_selectOrg = 'BestMatch', 
                 input_geneSearch = "HOXA", 
                 input_useSD = "FALSE") # Use standard deviation instead of standard error in error bar?
## NULL

3. Heatmap

# hierarchical clustering tree
x <- readData.out_$data
maxGene <- apply(x, 1, max)
# remove bottom 25% lowly expressed genes, which inflate the PPC
x <- x[which(maxGene > quantile(maxGene)[1]), ]
plot(as.dendrogram(hclust2(dist2(t(x)))), ylab = "1 - Pearson C.C.", type = "rectangle")

# Correlation matrix
#input_labelPCC_ <- TRUE # Show correlation coefficient?
correlationMatrix(readData.out = readData.out_, input_labelPCC = TRUE)

png(paste(params$input_folder, "heatmap.png", sep = "/"), width = 10, height = 15, units = "in", res = 300)
staticHeatmap(readData.out = readData.out_, 
              readSampleInfo.out = readSampleInfo.out_, 
              heatColors = heatColors_, 
              input_nGenes = 1000, 
              input_geneCentering = TRUE, 
              input_sampleCentering = FALSE, 
              input_geneNormalize = FALSE, 
              input_sampleNormalize = FALSE, 
              input_noSampleClustering = FALSE, 
              input_heatmapCutoff = 4, 
              input_distFunctions = 1, 
              input_hclustFunctions = 1, 
              input_heatColors1 = 1, 
              input_selectFactorsHeatmap = 'Gravity')
dev.off()
## quartz_off_screen 
##                 2

[heatmap] (heatmap.png)

heatmapPlotly(convertedData.out = convertedData.out_, 
              heatColors = heatColors_, 
              allGeneInfo.out = allGeneInfo.out_, 
              input_geneCentering = TRUE, 
              input_sampleCentering = FALSE, 
              input_geneNormalize = FALSE, 
              input_sampleNormalize = FALSE, 
              input_heatColors1 = 1)# interactive heatmap using Plotly

4. K-means clustering

distributionSD(convertedData.out = convertedData.out_, 
               input_nGenesKNN = 2000) # Distribution of standard deviations

KmeansNclusters(convertedData.out = convertedData.out_, 
                input_nGenesKNN = 2000) # Number of clusters

Kmeans.out_ <- Kmeans(convertedData.out = convertedData.out_,
                      maxGeneClustering = 12000, 
                      input_nGenesKNN = 2000, 
                      input_nClusters = 4, 
                      input_kmeansNormalization = "geneMean", 
                      input_KmeansReRun = 0) # Running K-means

KmeansHeatmap(Kmeans.out = Kmeans.out_, 
              .mycolors = mycolors_, 
              .heatColors = heatColors_, 
              .input_heatColors1 = 1) # Heatmap for k-Means

# Read gene sets for enrichment analysis
GeneSets.out_ <- readGeneSets(
  fileName = geneSetFile_,
  convertedData = convertedData.out_, 
  GO = "GOBP", 
  selectOrg = "NEW",
  myrange = c(15, 2000)
)

# Alternatively, users can use their own GMT files by
# GeneSets.out_ <- readGMTRobust('somefile.GMT')
results <- KmeansGO(Kmeans.out = Kmeans.out_, 
                    input_nClusters = 4, 
                    GeneSets.out = GeneSets.out_) # Enrichment analysis for k-Means clusters

results$adj.Pval <- format(results$adj.Pval, digits = 3)

kable(results, row.names = FALSE) %>%
  kable_styling(bootstrap_options = c("striped", "hover")) %>%
  scroll_box(width = "100%")
Cluster adj.Pval Genes Pathways
A 2.25e-169 236 Amide biosynthetic process
7.51e-167 225 Translation
1.76e-166 225 Peptide biosynthetic process
2.01e-161 313 Organonitrogen compound biosynthetic process
2.03e-159 242 Cellular amide metabolic process
4.21e-158 226 Peptide metabolic process
1.69e-89 137 Ribonucleoprotein complex biogenesis
5.02e-83 206 Cellular component biogenesis
1.94e-71 109 Ribosome biogenesis
1.93e-62 95 Response to cadmium ion
B 1.41e-14 17 Detoxification
2.90e-14 19 Response to toxic substance
2.90e-14 16 Drug catabolic process
2.90e-14 15 Cellular oxidant detoxification
3.83e-14 10 Water transport
3.83e-14 10 Fluid transport
3.83e-14 15 Cellular detoxification
4.93e-14 12 Hydrogen peroxide catabolic process
7.79e-14 15 Cellular response to toxic substance
2.35e-13 20 Response to oxidative stress
C 1.83e-32 129 Response to abiotic stimulus
2.31e-31 83 Response to inorganic substance
4.80e-25 54 Response to metal ion
1.10e-23 88 Cellular catabolic process
4.30e-23 96 Catabolic process
5.18e-23 45 Response to cadmium ion
5.68e-22 93 Response to oxygen-containing compound
7.79e-21 87 Oxidation-reduction process
9.68e-21 88 Cellular response to chemical stimulus
6.58e-19 74 Response to acid chemical
D 1.66e-16 51 Response to abiotic stimulus
1.66e-16 20 Cellular response to decreased oxygen levels
1.66e-16 43 Cellular response to chemical stimulus
1.66e-16 20 Cellular response to oxygen levels
1.66e-16 20 Cellular response to hypoxia
7.26e-16 38 Cellular response to stress
9.22e-16 20 Response to hypoxia
1.07e-15 20 Response to decreased oxygen levels
1.07e-15 20 Response to oxygen levels
2.58e-15 39 Response to external stimulus
tSNEgenePlot(Kmeans.out_, 
             input_seedTSNE = 0, 
             input_colorGenes = TRUE, # Color genes in t-SNE plot?
             mycolors = mycolors_) # Plot genes using t-SNE

5. PCA and beyond

PCAplot(convertedData.out = convertedData.out_, 
        readSampleInfo.out = readSampleInfo.out_, 
        input_selectFactors = colnames(readSampleInfo.out_)[1], 
        input_selectFactors2 = colnames(readSampleInfo.out_)[2])

MDSplot(convertedData.out = convertedData.out_, 
        readSampleInfo.out = readSampleInfo.out_, 
        input_selectFactors = colnames(readSampleInfo.out_)[1], 
        input_selectFactors2 = colnames(readSampleInfo.out_)[2])

tSNEplot(convertedData.out = convertedData.out_, 
         readSampleInfo.out = readSampleInfo.out_, 
         input_selectFactors = colnames(readSampleInfo.out_)[1],
         input_selectFactors2 = colnames(readSampleInfo.out_)[2],
         input_tsneSeed2 = 0)

# Read gene sets for pathway analysis using PGSEA on principal components
GeneSets.out_ <- readGeneSets(
  fileName = geneSetFile_,
  convertedData = convertedData.out_, 
  GO = "GOBP", 
  selectOrg = "NEW",
  myrange = c(15, 2000)
)

PCApathway(convertedData.out = convertedData.out_, 
           GeneSets.out = GeneSets.out_) # Run PGSEA analysis

cat(
  PCA2factor(readData.out = readData.out_, 
             readSampleInfo.out = readSampleInfo.out_)
) # The correlation between PCs with factors
## 
##  Correlation between Principal Components (PCs) with factors
## PC1 is correlated with Light (p=5.73e-03).
## PC2 is correlated with Variety (p=2.38e-06).
## PC5 is correlated with Gravity (p=1.07e-05).

6. DEG1

limma.out_ <- limma(convertedData.out = convertedData.out_, 
                    readSampleInfo.out = readSampleInfo.out_, 
                    input_dataFileFormat = 1, 
                    input_countsLogStart = 4, 
                    convertedCounts.out = convertedCounts.out_, 
                    input_CountsDEGMethod = 3, # 3 
                    input_limmaPval = 0.1, 
                    input_limmaFC = 2, 
                    input_selectModelComprions = paste0(colnames(readSampleInfo.out_)[1], ": ",
                                                        unique(readSampleInfo.out_[,1])[1], " vs. ",
                                                        unique(readSampleInfo.out_[,1])[2]), #"Gravity: Microgravity vs. Terrestrial", # format as "group: control vs. mutant"
                    input_selectFactorsModel = colnames(readSampleInfo.out_)[1], #"Gravity", 
                    input_selectInteractions = NULL, 
                    input_selectBlockFactorsModel = NULL, 
                    factorReferenceLevels.out = paste(colnames(readSampleInfo.out_)[1],
                                                      unique(readSampleInfo.out_[,1])[2], sep = ":")) #"Gravity:Terrestrial")
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
DEG.data.out_ <- DEG.data(limma.out = limma.out_, 
                          convertedData.out = convertedData.out_, 
                          allGeneInfo.out = allGeneInfo.out_)

limma.out_$comparisons
## [1] "Terrestrial-Microgravity"
nVennGroups <- min(params$nVennGroupsMax, length(limma.out_$comparisons)) # if less than three comparisons, include all comparisons.
vennPlot(limma.out = limma.out_, 
         input_selectComparisonsVenn = limma.out_$comparisons[1:nVennGroups], 
         input_UpDownRegulated = FALSE) # Split up and down regulated genes

sigGeneStats(limma.out_) # number of DEGs as figure

sigGeneStatsTable(limma.out_) # number of DEGs as table
##                                       Comparisons Up Down
## Terrestrial-Microgravity Terrestrial-Microgravity 50   77

7. DEG2

# input_selectContrast_ <- "Microgravity-Terrestrial" # Selected comparisons
selectedHeatmap.data.out_ <- selectedHeatmap.data(
  convertedData.out = convertedData.out_, 
  readSampleInfo.out = readSampleInfo.out_, 
  limma.out = limma.out_, 
  .converted.out = NULL, 
  .readData.out = readData.out_, 
  .input_noIDConversion = TRUE, 
  input_dataFileFormat = 1, 
  input_CountsDEGMethod = 3, 
  input_selectModelComprions = paste0(colnames(readSampleInfo.out_)[1], ": ",
                                      unique(readSampleInfo.out_[,1])[1], " vs. ",
                                      unique(readSampleInfo.out_[,1])[2]), #"Gravity: Microgravity vs. Terrestrial", # format as "group: control vs. mutant"
  input_selectFactorsModel = colnames(readSampleInfo.out_)[1], #"Gravity", 
  factorReferenceLevels.out = paste(colnames(readSampleInfo.out_)[1],
                                    unique(readSampleInfo.out_[,1])[2], sep = ":"), #"Gravity:Terrestrial"
  input_selectContrast = paste(unique(readSampleInfo.out_[,1])[1], 
                               unique(readSampleInfo.out_[,1])[2],
                               sep = "-") # "Microgravity-Terrestrial"
)

selectedHeatmap(selectedHeatmap.data.out = selectedHeatmap.data.out_, 
                .mycolors = mycolors_, 
                .heatColors = heatColors_) # heatmap for DEGs in selected comparison

# Save gene lists and data into files
write.csv(
  selectedHeatmap.data(convertedData.out = convertedData.out_, 
                       readSampleInfo.out = readSampleInfo.out_,
                       .converted.out = converted.out_, 
                       .readData.out = readData.out_, 
                       .input_noIDConversion = input_noIDConversion_, 
                       input_dataFileFormat = 1, 
                       input_CountsDEGMethod = 3, 
                       input_selectModelComprions = paste0(colnames(readSampleInfo.out_)[1], ": ",
                                                           unique(readSampleInfo.out_[,1])[1], " vs. ",
                                                           unique(readSampleInfo.out_[,1])[2]), #"Gravity: Microgravity vs. Terrestrial", # format as "group: control vs. mutant"
                       input_selectFactorsModel = colnames(readSampleInfo.out_)[1], #"Gravity", 
                       factorReferenceLevels.out = paste(colnames(readSampleInfo.out_)[1],
                                                         unique(readSampleInfo.out_[,1])[2], sep = ":"), #"Gravity:Terrestrial"
                       input_selectContrast = paste(unique(readSampleInfo.out_[,1])[1], 
                                                    unique(readSampleInfo.out_[,1])[2],
                                                    sep = "-"),
                       limma.out = limma.out_)$genes, 
  paste(params$input_folder, "heatmap.data.csv", sep = "/")
)

write.csv(DEG.data(limma.out = limma.out_, convertedData.out = convertedData.out_, allGeneInfo.out = allGeneInfo.out_), 
          paste(params$input_folder, "DEG.data.csv", sep = "/"))

write(AllGeneListsGMT(limma.out_), 
      paste(params$input_folder, "AllGeneListsGMT.gmt", sep = "/"))
input_selectGO2_ <- "GOBP" # Gene set category
geneListData.out_ <- geneListData(limma.out = limma.out_, 
                                  allGeneInfo.out = allGeneInfo.out_, 
                                  input_selectGO2 = "GOBP", 
                                  input_selectOrg = "NEW", 
                                  input_limmaPval = 0.1, 
                                  input_limmaFC = 2, 
                                  input_selectContrast = paste(unique(readSampleInfo.out_[,1])[1], 
                                                               unique(readSampleInfo.out_[,1])[2],
                                                               sep = "-")# "Microgravity-Terrestrial"
)

volcanoPlot(limma.out = limma.out_, 
            input_limmaPval = 0.1, 
            input_limmaFC = 2, 
            input_selectContrast = paste(unique(readSampleInfo.out_[,1])[1], 
                                         unique(readSampleInfo.out_[,1])[2],
                                         sep = "-")# "Microgravity-Terrestrial"
)

scatterPlot(limma.out = limma.out_, 
            convertedData.out = convertedData.out_, 
            readSampleInfo.out = readSampleInfo.out_, 
            input_dataFileFormat = 1,
            input_CountsDEGMethod = 3, 
            input_limmaPval = 0.1, 
            input_limmaFC = 2, 
            input_selectModelComprions = paste0(colnames(readSampleInfo.out_)[1], ": ",
                                                unique(readSampleInfo.out_[,1])[1], " vs. ",
                                                unique(readSampleInfo.out_[,1])[2]), #"Gravity: Microgravity vs. Terrestrial", # format as "group: control vs. mutant"
            input_selectFactorsModel = colnames(readSampleInfo.out_)[1], #"Gravity", 
            factorReferenceLevels.out = paste(colnames(readSampleInfo.out_)[1],
                                              unique(readSampleInfo.out_[,1])[2], sep = ":"), #"Gravity:Terrestrial"
            input_selectContrast = paste(unique(readSampleInfo.out_[,1])[1], 
                                         unique(readSampleInfo.out_[,1])[2],
                                         sep = "-"))

MAplot(
  limma.out = limma.out_, 
  convertedData.out = convertedData.out_, 
  readSampleInfo.out = readSampleInfo.out_, 
  .converted.out = converted.out_, 
  .readData.out = readData.out_, 
  .input_noIDConversion = TRUE, 
  input_dataFileFormat = 1, 
  input_CountsDEGMethod = 3, 
  input_limmaPval = 0.1, 
  input_limmaFC = 2, 
  input_selectModelComprions = paste0(colnames(readSampleInfo.out_)[1], ": ",
                                      unique(readSampleInfo.out_[,1])[1], " vs. ",
                                      unique(readSampleInfo.out_[,1])[2]), #"Gravity: Microgravity vs. Terrestrial", # format as "group: control vs. mutant"
  input_selectFactorsModel = colnames(readSampleInfo.out_)[1], #"Gravity", 
  factorReferenceLevels.out = paste(colnames(readSampleInfo.out_)[1],
                                    unique(readSampleInfo.out_[,1])[2], sep = ":"), #"Gravity:Terrestrial"
  input_selectContrast = paste(unique(readSampleInfo.out_[,1])[1], 
                               unique(readSampleInfo.out_[,1])[2],
                               sep = "-")
)

geneListGOTable.out_ <- geneListGOTable(GeneSets.out = GeneSets.out_, 
                                        minGenesEnrichment = 2, 
                                        selectedHeatmap.data.out = selectedHeatmap.data.out_)
# Read pathway data again
GeneSets.out_ <- readGeneSets(
  fileName = geneSetFile_,
  convertedData = convertedData.out_, 
  GO = "GOBP", 
  selectOrg = "NEW",
  myrange = c(15, 2000)
)

#input_removeRedudantSets_ <- TRUE # Remove highly redundant gene sets?
results <- geneListGO(geneListGOTable.out = geneListGOTable.out_, 
                      input_removeRedudantSets = TRUE) # Enrichment analysis

results$adj.Pval <- format(results$adj.Pval, digits = 3)

kable(results, row.names = FALSE) %>%
  kable_styling(bootstrap_options = c("striped", "hover")) %>%
  scroll_box(width = "100%")
Direction adj.Pval nGenes Pathways
Down regulated 2.0e-06 9 Phosphorelay signal transduction system
5.4e-06 20 Response to organic substance
7.0e-06 18 Response to endogenous stimulus
7.0e-06 18 Response to hormone
1.7e-05 17 Response to oxygen-containing compound
2.0e-05 9 Response to organic cyclic compound
8.9e-05 8 Response to antibiotic
1.4e-04 16 System development
1.6e-04 10 Intracellular signal transduction
1.6e-04 15 Cellular response to chemical stimulus
Up regulated 3.2e-06 6 Flavonoid metabolic process
1.4e-05 16 Response to abiotic stimulus
1.4e-05 5 Flavonoid biosynthetic process
1.6e-05 6 Circadian rhythm
1.6e-05 8 Response to auxin
1.9e-05 6 Rhythmic process
1.5e-04 9 Response to light stimulus
1.8e-04 9 Response to radiation
5.3e-04 3 Neutral amino acid transport
5.8e-04 4 Response to UV-B

STRING-db API access. We need to find the taxonomy id of your species, this used by STRING. First we try to guess the ID based on iDEP’s database. Users can also skip this step and assign NCBI taxonomy id directly by findTaxonomyID.out_ = 10090 # mouse 10090, human 9606 etc.

STRING10_species_ <- read.csv(STRING10_speciesFile_)
ix <- grep("Arabidopsis thaliana", STRING10_species_$official_name)
findTaxonomyID.out_ <- STRING10_species_[ix, 1] # find taxonomyID
findTaxonomyID.out_
## [1] 3702

Enrichment analysis using STRING

STRINGdb_geneList.out_ <- STRINGdb_geneList(geneListData.out = geneListData.out_, 
                                            findTaxonomyID.out = findTaxonomyID.out_) # convert gene lists
## Warning:  we couldn't map to STRING 0% of your identifiers
# input_STRINGdbGO_ <- "Process" #' Process', 'Component', 'Function', 'KEGG', 'Pfam', 'InterPro'
results <- stringDB_GO_enrichmentData(selectedHeatmap.data.out = selectedHeatmap.data.out_, 
                                      minGenesEnrichment = 2, 
                                      input_STRINGdbGO = "Process", #' Process', 'Component', 'Function', 'KEGG', 'Pfam', 'InterPro'
                                      findTaxonomyID.out = findTaxonomyID.out_, 
                                      STRINGdb_geneList.out = STRINGdb_geneList.out_) # enrichment using STRING
## Warning in string_db$get_enrichment(ids, category = input_STRINGdbGO, methodMT =
## "fdr", : methodMT parameter is depecated. Only FDR correction is available.
## Warning in string_db$get_enrichment(ids, category = input_STRINGdbGO, methodMT =
## "fdr", : iea parameter is deprecated.
## [1] "Process"
## Warning in string_db$get_enrichment(ids, category = input_STRINGdbGO, methodMT =
## "fdr", : methodMT parameter is depecated. Only FDR correction is available.

## Warning in string_db$get_enrichment(ids, category = input_STRINGdbGO, methodMT =
## "fdr", : iea parameter is deprecated.
## [1] "Process"
results$adj.Pval <- format(results$adj.Pval, digits = 3)

kable(results, row.names = FALSE) %>%
  kable_styling(bootstrap_options = c("striped", "hover")) %>%
  scroll_box(width = "100%")
“No significant enrichment found.” adj.Pval
No significant enrichment found. NULL

PPI network retrieval and analysis

stringDB_network1(geneLists = 1, 
                  input_nGenesPPI = 100, 
                  findTaxonomyID.out = findTaxonomyID.out_, 
                  STRINGdb_geneList.out = STRINGdb_geneList.out_) # Show PPI network

Generating interactive PPI

write(
  stringDB_network_link(
    input_nGenesPPI = 100, 
    findTaxonomyID.out = findTaxonomyID.out_, 
    STRINGdb_geneList.out = STRINGdb_geneList.out_, 
    geneListData.out = geneListData.out_
  ), 
  paste(params$input_folder, "PPI_results.html", sep = "/")
)
## Warning: 'string_db$get_link' is deprecated.
## Use 'Contact developers to request functionality' instead.
## See help("Deprecated")
## Warning:  we couldn't map to STRING 0% of your identifiers
## Warning: 'string_db$get_link' is deprecated.
## Use 'Contact developers to request functionality' instead.
## See help("Deprecated")

## Warning: 'string_db$get_link' is deprecated.
## Use 'Contact developers to request functionality' instead.
## See help("Deprecated")

8. Pathway analysis

# Read pathway data again
GeneSets.out_ <- readGeneSets(
  fileName = geneSetFile_,
  convertedData = convertedData.out_, 
  GO = "GOBP", 
  selectOrg = "NEW",
  myrange = c(15, 2000)
)

gagePathwayData.out_ <- gagePathwayData(limma.out = limma.out_, 
                                        input_minSetSize = 15, 
                                        input_maxSetSize = 2000, 
                                        input_selectContrast1 = paste(unique(readSampleInfo.out_[,1])[1], 
                                                                      unique(readSampleInfo.out_[,1])[2],
                                                                      sep = "-"), #"Microgravity-Terrestrial", 
                                        input_pathwayPvalCutoff = 0.2, 
                                        input_nPathwayShow = 30, 
                                        input_absoluteFold = FALSE, 
                                        input_GenePvalCutoff = 1, 
                                        GeneSets.out = GeneSets.out_) # pathway analysis using GAGE

results <- gagePathwayData.out_ # Enrichment analysis for k-Means clusters
results$adj.Pval <- format(results$adj.Pval, digits = 3)
kable(results, row.names = FALSE) %>%
  kable_styling(bootstrap_options = c("striped", "hover")) %>%
  scroll_box(width = "100%")
Direction GAGE analysis: Terrestrial vs Microgravity statistic Genes adj.Pval
Down Cellular response to decreased oxygen levels -7.4246 176 3.4e-10
Cellular response to oxygen levels -7.4246 176 3.4e-10
Cellular response to hypoxia -7.3912 175 3.4e-10
Response to hypoxia -7.0149 197 1.8e-09
Response to oxygen levels -6.991 201 1.8e-09
Response to decreased oxygen levels -6.9904 200 1.8e-09
Root epidermal cell differentiation -4.1893 115 5.4e-03
Plant epidermal cell differentiation -4.0288 132 7.7e-03
Response to toxic substance -4.0187 275 7.7e-03
Trichoblast differentiation -3.938 102 1.1e-02
Cell maturation -3.6762 96 2.2e-02
Trichoblast maturation -3.6762 96 2.2e-02
Root hair cell differentiation -3.6762 96 2.2e-02
Anatomical structure maturation -3.5015 151 3.2e-02
Response to antibiotic -3.4886 255 3.2e-02
Antibiotic metabolic process -3.4875 188 3.2e-02
Antibiotic catabolic process -3.455 78 3.8e-02
Response to drug -3.4042 482 3.8e-02
Cellular response to toxic substance -3.3863 148 3.8e-02
Response to reactive oxygen species -3.3836 145 3.8e-02
Up Photosynthesis 5.401 223 1.1e-04
Ribonucleoprotein complex biogenesis 5.01 438 3.5e-04
RNA modification 4.8591 321 4.9e-04
Ribosome biogenesis 4.7339 343 7.0e-04
NcRNA metabolic process 4.2086 425 5.6e-03
NcRNA processing 4.1143 357 7.2e-03
Cellular response to DNA damage stimulus 3.7807 337 2.3e-02
RRNA processing 3.7162 239 2.5e-02
DNA repair 3.7091 314 2.5e-02
RRNA metabolic process 3.6563 244 2.8e-02
pathwayListData.out_ <- pathwayListData(allGeneInfo.out = allGeneInfo.out_, 
                                        input_selectOrg = "NEW", 
                                        input_selectGO = "GOBP", 
                                        input_pathwayMethod = 1, 
                                        gagePathwayData.out = gagePathwayData.out_, 
                                        fgseaPathwayData.out = fgseaPathwayData.out_, 
                                        GeneSets.out = GeneSets.out_)

enrichmentPlot(pathwayListData.out_, enrichedTerms = 25)
## NULL
enrichmentNetwork(pathwayListData.out_)

enrichmentNetworkPlotly(pathwayListData.out_)

## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
# input_pathwayMethod_ <- 3 # 1  fgsea

fgseaPathwayData.out_ <- fgseaPathwayData(limma.out = limma.out_, 
                                          input_minSetSize = 15, 
                                          input_maxSetSize = 2000, 
                                          input_selectContrast1 = paste(unique(readSampleInfo.out_[,1])[1], 
                                                                        unique(readSampleInfo.out_[,1])[2],
                                                                        sep = "-"),
                                          input_pathwayPvalCutoff = 0.2, 
                                          input_absoluteFold = FALSE, 
                                          input_nPathwayShow = 30, 
                                          input_GenePvalCutoff = 1, 
                                          GeneSets.out = GeneSets.out_) # Pathway analysis using fgsea
## Warning in fgsea(pathways = gmt, stats = fold, minSize = input_minSetSize, :
## You are trying to run fgseaSimple. It is recommended to use fgseaMultilevel. To
## run fgseaMultilevel, you need to remove the nperm argument in the fgsea function
## call.
results <- fgseaPathwayData.out_ # Enrichment analysis for k-Means clusters
results$adj.Pval <- format(results$adj.Pval, digits = 3)
kable(results, row.names = FALSE) %>%
  kable_styling(bootstrap_options = c("striped", "hover")) %>%
  scroll_box(width = "100%")
Direction GSEA analysis: Terrestrial vs Microgravity NES Genes adj.Pval
Down Cellular response to decreased oxygen levels -2.4389 176 6.8e-03
Cellular response to oxygen levels -2.4389 176 6.8e-03
Cellular response to hypoxia -2.4354 175 6.8e-03
Response to hypoxia -2.351 197 6.8e-03
Response to decreased oxygen levels -2.3453 200 6.8e-03
Response to oxygen levels -2.3434 201 6.8e-03
Trichoblast differentiation -2.2099 102 6.8e-03
Root epidermal cell differentiation -2.2039 115 6.8e-03
Plant epidermal cell differentiation -2.1582 132 6.8e-03
Cell maturation -2.1498 96 6.8e-03
Trichoblast maturation -2.1498 96 6.8e-03
Root hair cell differentiation -2.1498 96 6.8e-03
Root hair cell development -2.0882 78 6.8e-03
Root hair elongation -2.0431 58 8.7e-03
Response to hydrogen peroxide -1.9985 65 8.7e-03
Indole glucosinolate metabolic process -1.9908 27 8.7e-03
Up Photosynthesis 2.3365 223 6.8e-03
Flavonoid metabolic process 2.2952 68 6.8e-03
Flavonoid biosynthetic process 2.2846 50 6.8e-03
Response to UV-B 2.2808 59 6.8e-03
Response to UV 2.186 95 6.8e-03
Regulation of flavonoid biosynthetic process 2.1376 16 6.8e-03
Regulation of anthocyanin metabolic process 2.046 19 1.3e-02
Response to gravity 2.0446 78 6.8e-03
Photosynthesis, light reaction 2.0175 119 6.8e-03
Anthocyanin-containing compound metabolic process 1.9915 35 2.2e-02
Response to far red light 1.9865 45 2.2e-02
Regulation of response to red or far red light 1.9849 34 2.2e-02
Anthocyanin-containing compound biosynthetic process 1.984 20 2.2e-02
Response to red light 1.9683 55 1.9e-02
pathwayListData.out_ <- pathwayListData(allGeneInfo.out = allGeneInfo.out_, 
                                        input_selectOrg = "NEW", 
                                        input_selectGO = "GOBP", 
                                        input_pathwayMethod = 3, 
                                        gagePathwayData.out = gagePathwayData.out_, 
                                        fgseaPathwayData.out = fgseaPathwayData.out_, 
                                        GeneSets.out = GeneSets.out_)

enrichmentPlot(enrichedTerms = pathwayListData.out_, 
               rightMargin = 25, 
               mycolors = mycolors_)

enrichmentNetwork(pathwayListData.out_)

enrichmentNetworkPlotly(pathwayListData.out_)

PGSEAplot(convertedData.out = convertedData.out_, 
          readSampleInfo.out = readSampleInfo.out_, 
          input_selectOrg = "NEW", 
          input_dataFileFormat = 1, 
          input_selectGO = "GOBP", 
          input_minSetSize = 15, 
          input_maxSetSize = 2000, 
          input_CountsDEGMethod = 3, 
          input_selectModelComprions = paste0(colnames(readSampleInfo.out_)[1], ": ",
                                              unique(readSampleInfo.out_[,1])[1], " vs. ",
                                              unique(readSampleInfo.out_[,1])[2]), #"Gravity: Microgravity vs. Terrestrial", # format as "group: control vs. mutant"
          input_selectFactorsModel = colnames(readSampleInfo.out_)[1], #"Gravity", 
          factorReferenceLevels.out = paste(colnames(readSampleInfo.out_)[1],
                                            unique(readSampleInfo.out_[,1])[2], sep = ":"), #"Gravity:Terrestrial"
          input_selectContrast1 = paste(unique(readSampleInfo.out_[,1])[1], 
                                        unique(readSampleInfo.out_[,1])[2],
                                        sep = ":"),
          input_pathwayPvalCutoff = 0.2, 
          input_nPathwayShow = 30, 
          GeneSets.out = GeneSets.out_) # pathway analysis using PGSEA
## 
## Computing P values using ANOVA

9. Chromosome

#input_selectContrast2_ <- "Microgravity-Terrestrial" # select Comparison
# input_selectContrast2 = limma.out_$comparisons[3] # manually set
#input_limmaPvalViz_ <- 0.1 # FDR to filter genes
#input_limmaFCViz_ <- 2 # FDR to filter genes
genomePlotly(limma.out = limma.out_, 
             allGeneInfo.out = allGeneInfo.out_, 
             input_selectContrast2 = paste(unique(readSampleInfo.out_[,1])[1], 
                                           unique(readSampleInfo.out_[,1])[2],
                                           sep = "-"),
             input_limmaPvalViz = 0.1, 
             input_limmaFCViz = 2) # shows fold-changes on the genome
## Warning in eval(quote(list(...)), env): NAs introduced by coercion
## Warning in genomePlotly(limma.out = limma.out_, allGeneInfo.out =
## allGeneInfo.out_, : NAs introduced by coercion

10. Biclustering

biclustering.out_ <- biclustering(convertedData.out = convertedData.out_, 
                                  input_nGenesBiclust = 1000, 
                                  input_biclustMethod = "BCCC()") # run analysis

input_selectBicluster_ <- 1 # select a cluster
biclustHeatmap(biclustering.out = biclustering.out_, 
               heatColors = heatColors_, 
               input_heatColors1 = 1, 
               input_selectBicluster = 1) # heatmap for selected cluster

#input_selectGO4_ <- "GOBP" # Gene set category
# Read pathway data again
GeneSets.out_ <- readGeneSets(
  fileName = geneSetFile_,
  convertedData = convertedData.out_, 
  GO = "GOBP", 
  selectOrg = "NEW",
  myrange = c(15, 2000)
)

results <- geneListBclustGO(minGenesEnrichment = 2, 
                            input_selectBicluster = 1, 
                            biclustering.out = biclustering.out_, 
                            GeneSets.out = GeneSets.out_) # Enrichment analysis for k-Means clusters

results$adj.Pval <- format(results$adj.Pval, digits = 3)

kable(results, row.names = FALSE) %>%
  kable_styling(bootstrap_options = c("striped", "hover")) %>%
  scroll_box(width = "100%")
adj.Pval Genes Pathways
1.9e-120 192 Peptide metabolic process
1.9e-117 182 Translation
3.3e-117 182 Peptide biosynthetic process
8.8e-113 185 Amide biosynthetic process
2.1e-111 196 Cellular amide metabolic process
5.5e-96 238 Organonitrogen compound biosynthetic process
1.4e-68 151 Response to inorganic substance
6.0e-57 101 Response to metal ion
1.3e-53 86 Response to cadmium ion
4.4e-49 197 Response to abiotic stimulus

11. Co-expression network

wgcna.out_ <- wgcna(convertedData.out = convertedData.out_, 
                    maxGeneWGCNA = 2000, 
                    input_mySoftPower = 5, 
                    input_nGenesNetwork = 1000, 
                    input_minModuleSize = 20) # run WGCNA
## Warning: executing %dopar% sequentially: no parallel backend registered
##    Power SFT.R.sq  slope truncated.R.sq mean.k. median.k. max.k.
## 1      1   0.7780  1.540          0.918  368.00    378.00  520.0
## 2      2   0.2370  0.285          0.681  189.00    190.00  333.0
## 3      3   0.0936 -0.126          0.651  114.00    108.00  234.0
## 4      4   0.5360 -0.387          0.797   74.60     67.20  176.0
## 5      5   0.7270 -0.553          0.905   52.10     44.20  140.0
## 6      6   0.8430 -0.724          0.971   38.10     30.50  116.0
## 7      7   0.8550 -0.824          0.955   28.80     21.40   98.1
## 8      8   0.9000 -0.922          0.984   22.40     15.90   84.7
## 9      9   0.9090 -0.995          0.969   17.80     12.00   74.2
## 10    10   0.9080 -1.060          0.953   14.40      9.25   65.6
## 11    12   0.9290 -1.130          0.950    9.86      5.67   52.5
## 12    14   0.9370 -1.180          0.941    7.06      3.53   43.0
## 13    16   0.9550 -1.210          0.949    5.24      2.34   35.8
## 14    18   0.9530 -1.220          0.940    4.00      1.54   30.2
## 15    20   0.9730 -1.230          0.965    3.12      1.08   25.8
## TOM calculation: adjacency..
## ..will not use multithreading.
##  Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
softPower(wgcna.out_) # soft power curve

listWGCNA.Modules.out <- listWGCNA.Modules(wgcna.out_) # modules
modulePlot(wgcna.out_) # plot modules

# Read pathway data again
GeneSets.out_ <- readGeneSets(
  fileName = geneSetFile_,
  convertedData = convertedData.out_, 
  GO = "GOBP", 
  selectOrg = "NEW",
  myrange = c(15, 2000)
)

moduleNetwork(wgcna.out = wgcna.out_, 
              input_noIDConversion = TRUE, 
              allGeneInfo.out = allGeneInfo.out_, 
              input_selectOrg = "NEW", 
              input_selectWGCNA.Module = "Entire network", 
              input_topGenesNetwork = 10, 
              input_edgeThreshold = 0.4, 
              input_selectGO5 = "GOBP") # show network of top genes in selected module
##  softConnectivity: FYI: connecitivty of genes with less than 12 valid samples will be returned as NA.
##  ..calculating connectivities..

results <- networkModuleGO(GeneSets.out = GeneSets.out_, 
                           minGenesEnrichment = 2, 
                           input_selectWGCNA.Module = "Entire network", 
                           wgcna.out = wgcna.out_) # Enrichment analysis of selected module

results$adj.Pval <- format(results$adj.Pval, digits = 3)
kable(results, row.names = FALSE) %>%
  kable_styling(bootstrap_options = c("striped", "hover")) %>%
  scroll_box(width = "100%")
adj.Pval Genes Pathways
1.9e-120 192 Peptide metabolic process
1.9e-117 182 Translation
3.3e-117 182 Peptide biosynthetic process
8.8e-113 185 Amide biosynthetic process
2.1e-111 196 Cellular amide metabolic process
5.5e-96 238 Organonitrogen compound biosynthetic process
1.4e-68 151 Response to inorganic substance
6.0e-57 101 Response to metal ion
1.3e-53 86 Response to cadmium ion
4.4e-49 197 Response to abiotic stimulus